require 'config/environment'

task :make_word_list do
  raise "IO Error: Please specify text filename (UTF-8)" unless ENV.has_key? "doc"
  raise "IO Error: Please specify output filename" unless ENV.has_key? "out"
  
  #read data
  require 'set'
  pos_list = Set.new ["名詞","副詞","形容詞","動詞"]
  results = Hash.new{|hash,key| hash[key] = 0 }
  require 'MeCab'
  puts "read data"
  count = 0
  File.open(ENV["doc"],"r") do |file|
    while line = file.gets
      MeCab::Tagger.new("-Ochasen").parse(line.chomp!).split("\n").each do |str|
        if /EOS/ !~ str then
          word = str.split("\t")
          cur_pos = word[3].split("-")[0]
          results[word[0]] += 1 if pos_list.include?(cur_pos)
        end
      end
      
      if (count % 1000) == 0
        puts "" if (count / 1000) % 100 == 0
        print "." 
      end
      count += 1
    end
  end
  puts ""

  #save data
  rs = ENV.has_key?("rs") ? ENV["rs"] : "\t"
  require 'csv'
  file = CSV.open(ENV["out"],"w",rs)
  puts "save data"
  results.each do |surface,frequency|
    file << [surface,nil,frequency]
  end
  file.close
end